<!Doctype html>
<html>

<head>
    <title>oixan的博客</title>
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <meta http-equiv="Content-Type" content="text/html" ; charset="utf-8">
    <link rel="stylesheet" type="text/css" href="../css/blog.css">
</head>

<body>
    <div class="head">
        <div class="headnav">
            <div class="naveframe">
                <ul class="navlist">
                    <li>
                        <a href="../index.html">
                            主页
                        </a>
                    </li>
                    <li>
                        <a href="../bloglist.html">
                            博客
                        </a>
                    </li>
                    <li>
                        <a href="../about.html">
                            关于
                        </a>
                    </li>
                </ul>
            </div>
        </div>

        <div class="headtext">
            <div class="headtext1">世界很简单，人类很复杂。</div>
            <div class="headtext2">浮生若梦，寻寻觅觅。</div>
        </div>
    </div>

    <div class="contents-all">
        <div class="contents">

            <div class="frame">
                <div class="title">
                    <h3>python爬虫爬取美图片</h3>
                </div>
                <div class="dtime">时间：2018-06-11</div>
                <div class="detail">

                        <p>
                                <strong>爬取唯美()图片</strong><br/>
                            </p>
                            <p>
                                <strong><br/></strong>
                            </p>
                            <p>
                                import urllib.request
                            </p>
                            <p>
                                import re
                            </p>
                            <p>
                                import time
                            </p>
                            <p>
                                import os
                            </p>
                            <p>
                                <br/>
                            </p>
                            <p>
                                def getPic():
                            </p>
                            <p>
                                &nbsp; &nbsp; url=&#39;http://www.meinv/page/&#39;
                            </p>
                            <p>
                                <br/>
                            </p>
                            <p>
                                &nbsp; &nbsp; path=&#39;E:/qxpicture/&#39;
                            </p>
                            <p>
                                &nbsp; &nbsp; url=[url+&quot;{a}&quot;.format(a=str(i)) for i in range(1,3)]
                            </p>
                            <p>
                                &nbsp; &nbsp; url[0]=&#39;http://www.meinv&#39;
                            </p>
                            <p>
                                &nbsp; &nbsp; if not os.path.exists(path):
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; os.makedirs(path)
                            </p>
                            <p>
                                &nbsp; &nbsp; try:
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; for ur in url:
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ##page=requests.get(ur).text
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; page=urllib.request.urlopen(ur)
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; pages=page.read().decode()
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ##print(pages)
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; href=re.findall(&#39;&lt;div class=&quot;wrapinner&quot;&gt;(.*?)&lt;div class=&quot;side&quot;&gt;&#39;,pages,re.S)
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ##print(href)
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; for href1 in href:
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; href=href1.replace(&#39;\r&#39;,&#39;&#39;).replace(&#39;\n&#39;,&#39;&#39;).replace(&#39;\t&#39;,&#39;&#39;)
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; href=re.findall(&#39;&lt;div class=&quot;txt&quot;&gt;&lt;a href=&quot;(.*?)&quot; target=&quot;_blank&quot;&gt;&#39;,href)
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; #print(href)
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; for pageur in href:
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; html=urllib.request.urlopen(pageur)
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; htmls=html.read().decode()
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; src=re.findall(&#39;src=&quot;(.*?.jpg|.*?.png)&quot; title=&#39;,htmls)
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; for link in src:
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; print(link)
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if link[0:4]==&#39;http&#39;:
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; urllib.request.urlretrieve(link,path+&#39;%s.jpg&#39;%time.time())
                            </p>
                            <p>
                                &nbsp; &nbsp; except urllib.error.HTTPError as e:
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;print(e.code)
                            </p>
                            <p>
                                &nbsp; &nbsp; &nbsp;&nbsp;
                            </p>
                            <p>
                                if __name__==&#39;__main__&#39;:
                            </p>
                            <p>
                                &nbsp; &nbsp; getPic()&nbsp;&nbsp;
                            </p>
                            <p>
                                <br/>
                            </p>
                            <a href="../img/blogimg/爬下来的部分图片.rar" download="爬下来的部分图片.rar"></a>
                </div>
                <a href="javascript:void(0)" onclick="showPg()">返回</a>
            </div>


        </div>
    </div>
    <div class="footer">
        <div class="footertext">©2018,oixan博客</div>
    </div>
    <span id="backtop" onclick="topFunction()">返回顶部</span>
</body>
<script  src="../js/fanhuiye.js"></script>
</html>